The model and data at healthdata.org includes start and stop dates for various categories of lockdown measures (e.g. closing schools, any transit restrictions, closures of non-essential businesses) as well as estimates of current cases for many countries (and some sub-country regions). If we approximate the dymanics as exponential processes (like the SIR family of models but without saturation) we can estimate the current rate of spread and correlate that with current lockdown measures.
Some future possibilities:
from itertools import permutations, product
import holoviews as hv
import numpy as np
import pandas as pd
import pymc3 as pm
hv.notebook_extension('bokeh')
%opts Overlay [aspect=5/3, responsive=True]
healthdata = pd.read_csv('2020_05_28/Hospitalization_all_locs.csv', parse_dates=['date'])
stats = pd.read_csv('2020_05_23.03/Summary_stats_all_locs.csv', parse_dates=[
'peak_bed_day_mean', 'peak_bed_day_lower',
'peak_bed_day_upper', 'peak_icu_bed_day_mean', 'peak_icu_bed_day_lower',
'peak_icu_bed_day_upper', 'peak_vent_day_mean', 'peak_vent_day_lower',
'peak_vent_day_upper', 'travel_limit_start_date', 'travel_limit_end_date',
'stay_home_start_date', 'stay_home_end_date',
'educational_fac_start_date', 'educational_fac_end_date',
'any_gathering_restrict_start_date', 'any_gathering_restrict_end_date',
'any_business_start_date', 'any_business_end_date',
'all_non-ess_business_start_date', 'all_non-ess_business_end_date',
])
# We will melt the start and end dates and drop the missing data so we can determine
# whether or not a policy was in place using an asof merge to the most recent change
countermeasures = stats.melt(
id_vars=['location_name'],
value_vars=[c for c in stats.columns if 'date' in c],
value_name='date'
).dropna()
countermeasures['type'] = countermeasures['variable'].apply(
lambda name: '_'.join(name.replace('-', '_').split('_')[:-2])
)
countermeasure_types = sorted(set(countermeasures['type']))
countermeasures['enforced'] = countermeasures['variable'].apply(lambda name: name.split('_')[-2] == 'start')
countermeasures.drop(columns=['variable'], inplace=True)
countermeasures.sort_values('date', inplace=True)
countermeasure_types
merged = healthdata.copy()
# healthdata uses projections for the past two weeks just as they do for future dates
# as a mitigation for lag in reporting. To look only at estimates for days with data,
# discard anything newer than two weeks before the latest data.
merged.dropna(subset=['confirmed_infections'], inplace=True)
merged = merged.loc[merged['date'] < merged['date'].max() - pd.to_timedelta('2w')]
# Regions that are decomposed are stored here, too, but without countermeasure data.
# Therefore we discard the groups and keep their subdivisions
group_names = [
'Brazil',
'Canada',
'Germany',
'Italy',
'Mexico',
'Spain',
'United States of America',
]
merged = merged.loc[~merged['location_name'].apply(group_names.__contains__)]
# to reduce noise, only investigate growth once a location has at least N infections
N = 200
merged = merged.loc[merged['est_infections_mean'] > N]
# Drop locations with fewer than N samples
enough = merged.groupby('location_name')['V1'].count() >= 40
merged = merged.merge(enough, left_on='location_name', right_index=True)
merged = merged.loc[merged['V1_y']]
merged.sort_values('date', inplace=True)
for countermeasure_type, subset in countermeasures.groupby('type'):
subset = subset.drop(columns='type').rename(columns={'enforced': countermeasure_type})
merged = pd.merge_asof(merged, subset, on='date', by='location_name')
merged[countermeasure_type].fillna(False, inplace=True)
cols = ['est_infections_mean', 'est_infections_lower', 'est_infections_upper']
diffs = merged.groupby('location_name')[cols].diff().rename(columns={c: f'diff_{c}' for c in cols})
merged = pd.concat([merged, diffs], axis=1)
merged['fractional_diff_est_infections_mean'] = merged['diff_est_infections_mean'] / merged['est_infections_mean']
merged['day_of_year'] = merged['date'].dt.dayofyear
merged['day_of_week'] = merged['date'].dt.dayofweek
merged['random'] = np.random.normal(size=len(merged))
print(len(healthdata), len(merged))
merged['num_active'] = 0
for countermeasure_type in countermeasure_types:
merged['num_active'] += merged[countermeasure_type]
(
hv.Scatter(merged, 'date', ['fractional_diff_est_infections_mean', 'num_active'])
.options(alpha=0.3, jitter=5e7, legend_position='right', aspect=5/3, responsive=True,
colorbar=True, cmap='viridis', color=hv.dim('num_active'))
)
hv.HoloMap({
countermeasure_type:
hv.Scatter(
merged.loc[~merged[countermeasure_type]],
'date', ['fractional_diff_est_infections_mean', 'location_name'] + countermeasure_types,
label='False'
).options(alpha=0.3, jitter=5e7, aspect=5/3, responsive=True, tools=['hover']) *
hv.Scatter(
merged.loc[merged[countermeasure_type]],
'date', ['fractional_diff_est_infections_mean', 'location_name'] + countermeasure_types,
label='True'
).options(alpha=0.3, jitter=5e7, tools=['hover'])
for countermeasure_type in countermeasure_types
}, 'Countermeasure').layout().cols(1)
X = merged.copy()
X['random'] = np.random.uniform(-1, 1, size=len(X))
X.sort_values('random', inplace=True)
plots = {}
for a, b in product(countermeasure_types, countermeasure_types):
x = X.copy()
x[b] += 0.4 * np.tanh(5.0 * x['fractional_diff_est_infections_mean'])
if a != b:
x[a] += 0.36 * x['random']
plots[(a, b)] = (
hv.Scatter(x, [a, b], 'fractional_diff_est_infections_mean')
.options(color=hv.dim('fractional_diff_est_infections_mean'), alpha=0.05, cmap='viridis')
)
(
hv.GridSpace(plots, sort=True)
.options(xrotation=45, yrotation=45)
.redim.range(fractional_diff_est_infections_mean=(-0.1, 0.3))
)